{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "undefined-enemy",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "from functools import reduce\n",
    "from collections import Counter\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "worse-sheet",
   "metadata": {},
   "source": [
    "### Pre-Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "active-mileage",
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 300\n",
    "vocab_size = 20000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bronze-diary",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>review</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>One of the other reviewers has mentioned that ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>I thought this was a wonderful way to spend ti...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Basically there's a family where a little boy ...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              review  sentiment\n",
       "0  One of the other reviewers has mentioned that ...          1\n",
       "1  A wonderful little production. <br /><br />The...          1\n",
       "2  I thought this was a wonderful way to spend ti...          1\n",
       "3  Basically there's a family where a little boy ...          0\n",
       "4  Petter Mattei's \"Love in the Time of Money\" is...          1"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('IMDB Dataset.csv')\n",
    "df['sentiment'] = (df['sentiment'] == 'positive').astype(int)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "unnecessary-artist",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "average word count: 231.15694\n"
     ]
    }
   ],
   "source": [
    "print(f\"average word count: {np.mean(df['review'].apply(lambda x: len(x.split())))}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "coordinate-prayer",
   "metadata": {},
   "outputs": [],
   "source": [
    "# clean a string and get the vocab words from it\n",
    "def get_words(string):\n",
    "    string = string.lower().replace('<br />', ' ') # <br /> tags frequently appear, remove them\n",
    "    return re.findall(r'[a-z]+', string)\n",
    "\n",
    "# create and clean the vocabulary\n",
    "text_corpus = ' '.join(df['review'])\n",
    "words = get_words(text_corpus)\n",
    "\n",
    "# take the x most common words, otherwise we get a vocab of 100000\n",
    "word_counts = Counter(words)\n",
    "words = sorted(word_counts.items(), reverse=True, key=lambda x: x[1])[:vocab_size-4]\n",
    "words = list(list(zip(*words))[0]) # remove the counts\n",
    "words = ['<MASK>', '<UNK>', '<START>', '<END>'] + words # add the tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "burning-exploration",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of unique words: 20000\n"
     ]
    }
   ],
   "source": [
    "# create the dictionaries to store the vocab\n",
    "vocab_idx = {}\n",
    "idx_vocab = {}\n",
    "for i, word in enumerate(words):\n",
    "    if word not in vocab_idx:\n",
    "        vocab_idx[word] = i\n",
    "        idx_vocab[i] = word\n",
    "print(f\"number of unique words: {len(vocab_idx)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "vocational-throat",
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert a string to a list of indexes of its words\n",
    "# also add start and end tokens along with right padding\n",
    "def string_to_idxs(string):\n",
    "    idxs = list(map(lambda x: vocab_idx[x] if x in vocab_idx else 1, get_words(string)))\n",
    "    idxs = [2] + idxs[-(batch_size-2):] + [3] # crop to the batch_size and add the start and end tokens\n",
    "    padding = np.zeros((batch_size - len(idxs))) + 0\n",
    "    idxs = np.concatenate([idxs, padding]).astype(int)\n",
    "    return idxs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ahead-export",
   "metadata": {},
   "outputs": [],
   "source": [
    "def index_to_string(idxes):\n",
    "    return ' '.join(list(map(lambda x: idx_vocab[x], idxes)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "recognized-anchor",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        [2, 32, 3176, 35, 28, 207, 17, 13, 9, 616, 50,...\n",
       "1        [2, 6, 393, 123, 353, 4, 1370, 2952, 9, 56, 17...\n",
       "2        [2, 12, 194, 13, 16, 6, 393, 98, 8, 1141, 60, ...\n",
       "3        [2, 679, 42, 15, 6, 229, 118, 6, 123, 400, 332...\n",
       "4        [2, 1, 10477, 15, 113, 11, 4, 60, 7, 292, 9, 6...\n",
       "                               ...                        \n",
       "49995    [2, 12, 194, 13, 18, 120, 6, 179, 207, 52, 297...\n",
       "49996    [2, 77, 114, 77, 411, 77, 117, 3007, 961, 4, 6...\n",
       "49997    [2, 12, 233, 6, 3398, 4364, 11, 1, 8165, 5462,...\n",
       "49998    [2, 12, 141, 170, 8, 30, 8, 2983, 20, 4, 919, ...\n",
       "49999    [2, 59, 31, 5822, 4, 324, 2082, 101, 8, 32, 30...\n",
       "Name: idxed_review, Length: 50000, dtype: object"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a column that represents reviews a list of the words' indexes in the vocab\n",
    "df['idxed_review'] = df['review'].apply(string_to_idxs)\n",
    "df['idxed_review']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "banned-termination",
   "metadata": {},
   "source": [
    "#### Build training and test set\n",
    "Training set has no closures and is a language modelling task\n",
    "\n",
    "Test set has the closures and is a classification task"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "accepting-consultancy",
   "metadata": {},
   "outputs": [],
   "source": [
    "# turn the review column into a multi dimensional array\n",
    "review_array = np.concatenate(df['idxed_review']).reshape(df.shape[0], batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "virtual-conversion",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(review_array, np.array(df['sentiment']), test_size=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "noble-welding",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 155,   24,  183, ...,  311, 6829,    3],\n",
       "       [  31,    7,    1, ...,    0,    0,    0],\n",
       "       [  13,   31,  115, ...,   78,   13,    3],\n",
       "       ...,\n",
       "       [   6, 6828,    5, ...,    0,    0,    0],\n",
       "       [  11, 1386, 4951, ...,    4, 4708,    3],\n",
       "       [  13,   18,   16, ...,    0,    0,    0]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the label for each word is the next word in the seqeunce, we also add a pad token at the end to keep the dimensions the same\n",
    "y_train = np.concatenate([X_train[:, 1:], np.ones((X_train.shape[0], 0)).astype(int)], axis=1)\n",
    "y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "imported-leader",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[   2,  155,   24, ...,  311, 6829,    3],\n",
       "       [   2,   31,    7, ...,    0,    0,    0],\n",
       "       [   2,   13,   31, ...,   78,   13,    3],\n",
       "       ...,\n",
       "       [   2,    6, 6828, ...,    0,    0,    0],\n",
       "       [   2,   11, 1386, ...,    4, 4708,    3],\n",
       "       [   2,   13,   18, ...,    0,    0,    0]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "environmental-words",
   "metadata": {},
   "source": [
    "### Model Building\n",
    "- First train an LSTM as a language model, then have it solve closures to get the predictions\n",
    "- Because the data would be too large to fit in memory if we converted it to one hot, we have to instead train using a generator (takes integers representing words and outputs a one hot vector\n",
    "- Clozure to start with: \"X + Overall this movie was (poor/great)\"  (figure out how to do Bidirectional LSTM without it just predicting \\<MASK\\> later)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "apparent-scheduling",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.keras import Sequential\n",
    "from tensorflow.keras.layers import Embedding, LSTM, Dense, TimeDistributed, Dropout\n",
    "tf.config.list_physical_devices('GPU') # check gpu is in use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "remarkable-television",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the generator class, it yields one review and its labels at a time\n",
    "class SampleSequence(tf.keras.utils.Sequence): # extend Seqeunce so that we can use multiprocessing\n",
    "    def __init__(self, X_train):\n",
    "        self.X = X_train\n",
    "        \n",
    "    def __len__(self):\n",
    "        return self.X.shape[0]\n",
    "        \n",
    "    # get item at the index specified\n",
    "    def __getitem__(self, idx):\n",
    "        # the pad_token is used for creating the labels\n",
    "        pad_token = np.zeros((1, vocab_size))\n",
    "        pad_token[0, 0] = 1\n",
    "        \n",
    "        x_sample = self.X[idx, :].reshape(1, batch_size) # the embedding layer automatically handles integers\n",
    "        y_sample = np.zeros((batch_size, vocab_size)) # the labels need to be manually converted to one hot\n",
    "        for i in range(batch_size - 1):\n",
    "            y_sample[i, self.X[idx, i+1]] = 1  # the label at step1 is the word at step2\n",
    "        y_sample[-1, :] = pad_token # add the pad token for the 300th step's label\n",
    "        y_sample = y_sample.reshape(1, batch_size, vocab_size)\n",
    "            \n",
    "        return (x_sample, y_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "unexpected-extraction",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Do the same thing but with a tensorflow dataset implementation, this allows parralelization \n",
    "# first define the mapping function to convert a tensor of X_train into an X, y sample\n",
    "pad_token = np.zeros((1, vocab_size))\n",
    "pad_token[0, 0] = 1\n",
    "pad_token = tf.constant(pad_token, dtype='float32')\n",
    "def text_to_sample(x_tensor): # takes a single sample and converts it to input text and ouput labels\n",
    "    y_one_hot = tf.one_hot(x_tensor, vocab_size, axis=-1) # axos=-1 means we get a tensor of shape (features, vocab_size)\n",
    "    y_one_hot = y_one_hot[1:, :] # shift back one\n",
    "    y_one_hot = tf.concat([y_one_hot, pad_token], axis=0) # add pad token at the end \n",
    "    x_tensor = tf.reshape(x_tensor, [1, batch_size])\n",
    "    return (x_tensor, y_one_hot)\n",
    "\n",
    "# create training Dataset, apply the function to create the pipeline\n",
    "def create_dataset(data):\n",
    "    dataset = tf.data.Dataset.from_tensor_slices(data)\n",
    "    dataset = dataset.map(text_to_sample)\n",
    "    return dataset\n",
    "\n",
    "training = create_dataset(X_train)\n",
    "testing = create_dataset(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "bibliographic-passage",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding (Embedding)        (None, 300, 500)          10000000  \n",
      "_________________________________________________________________\n",
      "lstm (LSTM)                  (None, 300, 500)          2002000   \n",
      "_________________________________________________________________\n",
      "lstm_1 (LSTM)                (None, 300, 500)          2002000   \n",
      "_________________________________________________________________\n",
      "dropout (Dropout)            (None, 300, 500)          0         \n",
      "_________________________________________________________________\n",
      "time_distributed (TimeDistri (None, 300, 20000)        10020000  \n",
      "=================================================================\n",
      "Total params: 24,024,000\n",
      "Trainable params: 24,024,000\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "# Create the model, eventually this should be bi-directional\n",
    "# This architecture was borrowed from here: https://adventuresinmachinelearning.com/keras-lstm-tutorial/\n",
    "model = Sequential()\n",
    "model.add(Embedding(vocab_size, 500, input_length=batch_size, mask_zero=True))\n",
    "model.add(LSTM(500, return_sequences=True,\n",
    "              activation='tanh', recurrent_activation='sigmoid',\n",
    "              dropout=0, recurrent_dropout=0))\n",
    "model.add(LSTM(500, return_sequences=True,\n",
    "              activation='tanh', recurrent_activation='sigmoid',\n",
    "              dropout=0, recurrent_dropout=0))\n",
    "model.add(Dropout(0.5))\n",
    "model.add(TimeDistributed(Dense(20000, activation='softmax')))\n",
    "model.compile(loss='categorical_crossentropy', metrics=['categorical_accuracy'],\n",
    "              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999))\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "weekly-worker",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train for 45000 steps, validate for 5000 steps\n",
      "Epoch 1/50\n",
      "45000/45000 [==============================] - 3801s 84ms/step - loss: 3.2874 - categorical_accuracy: 0.1894 - val_loss: 3.2041 - val_categorical_accuracy: 0.1981\n",
      "Epoch 2/50\n",
      "45000/45000 [==============================] - 3750s 83ms/step - loss: 3.2624 - categorical_accuracy: 0.1928 - val_loss: 3.1838 - val_categorical_accuracy: 0.2009\n",
      "Epoch 3/50\n",
      "45000/45000 [==============================] - 3831s 85ms/step - loss: 3.2403 - categorical_accuracy: 0.1959 - val_loss: 3.1662 - val_categorical_accuracy: 0.2033\n",
      "Epoch 4/50\n",
      "45000/45000 [==============================] - 3836s 85ms/step - loss: 3.2213 - categorical_accuracy: 0.1987 - val_loss: 3.1509 - val_categorical_accuracy: 0.2059\n",
      "Epoch 5/50\n",
      "45000/45000 [==============================] - 3828s 85ms/step - loss: 3.2034 - categorical_accuracy: 0.2012 - val_loss: 3.1376 - val_categorical_accuracy: 0.2075\n",
      "Epoch 6/50\n",
      "45000/45000 [==============================] - 3824s 85ms/step - loss: 3.1869 - categorical_accuracy: 0.2034 - val_loss: 3.1276 - val_categorical_accuracy: 0.2092\n",
      "Epoch 7/50\n",
      "45000/45000 [==============================] - 3828s 85ms/step - loss: 3.1722 - categorical_accuracy: 0.2057 - val_loss: 3.1163 - val_categorical_accuracy: 0.2112\n",
      "Epoch 8/50\n",
      "45000/45000 [==============================] - 3836s 85ms/step - loss: 3.1585 - categorical_accuracy: 0.2077 - val_loss: 3.1093 - val_categorical_accuracy: 0.2125\n",
      "Epoch 9/50\n",
      "45000/45000 [==============================] - 3778s 84ms/step - loss: 3.1458 - categorical_accuracy: 0.2096 - val_loss: 3.1020 - val_categorical_accuracy: 0.2137\n",
      "Epoch 10/50\n",
      "45000/45000 [==============================] - 3801s 84ms/step - loss: 3.1335 - categorical_accuracy: 0.2112 - val_loss: 3.0953 - val_categorical_accuracy: 0.2150\n",
      "Epoch 11/50\n",
      "45000/45000 [==============================] - 3817s 85ms/step - loss: 3.1218 - categorical_accuracy: 0.2130 - val_loss: 3.0894 - val_categorical_accuracy: 0.2161\n",
      "Epoch 12/50\n",
      "45000/45000 [==============================] - 3835s 85ms/step - loss: 3.1107 - categorical_accuracy: 0.2147 - val_loss: 3.0850 - val_categorical_accuracy: 0.2170\n",
      "Epoch 13/50\n",
      "45000/45000 [==============================] - 3859s 86ms/step - loss: 3.1000 - categorical_accuracy: 0.2163 - val_loss: 3.0812 - val_categorical_accuracy: 0.2173\n",
      "Epoch 14/50\n",
      "45000/45000 [==============================] - 3884s 86ms/step - loss: 3.0902 - categorical_accuracy: 0.2177 - val_loss: 3.0781 - val_categorical_accuracy: 0.2181\n",
      "Epoch 15/50\n",
      "45000/45000 [==============================] - 3823s 85ms/step - loss: 3.0810 - categorical_accuracy: 0.2191 - val_loss: 3.0753 - val_categorical_accuracy: 0.2187\n",
      "Epoch 16/50\n",
      "45000/45000 [==============================] - 3886s 86ms/step - loss: 3.0717 - categorical_accuracy: 0.2206 - val_loss: 3.0733 - val_categorical_accuracy: 0.2192\n",
      "Epoch 17/50\n",
      "45000/45000 [==============================] - 3934s 87ms/step - loss: 3.0627 - categorical_accuracy: 0.2219 - val_loss: 3.0721 - val_categorical_accuracy: 0.2196\n",
      "Epoch 18/50\n",
      "45000/45000 [==============================] - 3946s 88ms/step - loss: 3.0539 - categorical_accuracy: 0.2230 - val_loss: 3.0718 - val_categorical_accuracy: 0.2197\n",
      "Epoch 19/50\n",
      "45000/45000 [==============================] - 3948s 88ms/step - loss: 3.0456 - categorical_accuracy: 0.2244 - val_loss: 3.0706 - val_categorical_accuracy: 0.2201\n",
      "Epoch 20/50\n",
      "45000/45000 [==============================] - 3950s 88ms/step - loss: 3.0374 - categorical_accuracy: 0.2256 - val_loss: 3.0694 - val_categorical_accuracy: 0.2203\n",
      "Epoch 21/50\n",
      "45000/45000 [==============================] - 3955s 88ms/step - loss: 3.0292 - categorical_accuracy: 0.2267 - val_loss: 3.0701 - val_categorical_accuracy: 0.2204\n",
      "Epoch 22/50\n",
      "45000/45000 [==============================] - 3954s 88ms/step - loss: 3.0214 - categorical_accuracy: 0.2278 - val_loss: 3.0696 - val_categorical_accuracy: 0.2208\n",
      "Epoch 23/50\n",
      "45000/45000 [==============================] - 3954s 88ms/step - loss: 3.0134 - categorical_accuracy: 0.2290 - val_loss: 3.0698 - val_categorical_accuracy: 0.2209\n",
      "Epoch 24/50\n",
      "45000/45000 [==============================] - 3986s 89ms/step - loss: 3.0057 - categorical_accuracy: 0.2302 - val_loss: 3.0711 - val_categorical_accuracy: 0.2208\n",
      "Epoch 25/50\n",
      "45000/45000 [==============================] - 3960s 88ms/step - loss: 2.9982 - categorical_accuracy: 0.2312 - val_loss: 3.0726 - val_categorical_accuracy: 0.2211\n",
      "Epoch 26/50\n",
      "45000/45000 [==============================] - 3961s 88ms/step - loss: 2.9911 - categorical_accuracy: 0.2324 - val_loss: 3.0740 - val_categorical_accuracy: 0.2209\n",
      "Epoch 27/50\n",
      "45000/45000 [==============================] - 3961s 88ms/step - loss: 2.9836 - categorical_accuracy: 0.2334 - val_loss: 3.0757 - val_categorical_accuracy: 0.2207\n",
      "Epoch 28/50\n",
      "45000/45000 [==============================] - 3960s 88ms/step - loss: 2.9762 - categorical_accuracy: 0.2345 - val_loss: 3.0773 - val_categorical_accuracy: 0.2208\n",
      "Epoch 29/50\n",
      "45000/45000 [==============================] - 3962s 88ms/step - loss: 2.9696 - categorical_accuracy: 0.2353 - val_loss: 3.0800 - val_categorical_accuracy: 0.2210\n",
      "Epoch 30/50\n",
      "45000/45000 [==============================] - 3967s 88ms/step - loss: 2.9626 - categorical_accuracy: 0.2366 - val_loss: 3.0838 - val_categorical_accuracy: 0.2206\n",
      "Epoch 31/50\n",
      "45000/45000 [==============================] - 3957s 88ms/step - loss: 2.9555 - categorical_accuracy: 0.2376 - val_loss: 3.0870 - val_categorical_accuracy: 0.2209\n",
      "Epoch 32/50\n",
      "45000/45000 [==============================] - 3966s 88ms/step - loss: 2.9491 - categorical_accuracy: 0.2385 - val_loss: 3.0898 - val_categorical_accuracy: 0.2202\n",
      "Epoch 33/50\n",
      "45000/45000 [==============================] - 3969s 88ms/step - loss: 2.9424 - categorical_accuracy: 0.2394 - val_loss: 3.0939 - val_categorical_accuracy: 0.2202\n",
      "Epoch 34/50\n",
      "45000/45000 [==============================] - 3967s 88ms/step - loss: 2.9360 - categorical_accuracy: 0.2404 - val_loss: 3.0962 - val_categorical_accuracy: 0.2204\n",
      "Epoch 35/50\n",
      "45000/45000 [==============================] - 3968s 88ms/step - loss: 2.9300 - categorical_accuracy: 0.2414 - val_loss: 3.0994 - val_categorical_accuracy: 0.2201\n",
      "Epoch 36/50\n",
      "45000/45000 [==============================] - 3970s 88ms/step - loss: 2.9234 - categorical_accuracy: 0.2422 - val_loss: 3.1047 - val_categorical_accuracy: 0.2195\n",
      "Epoch 37/50\n",
      "45000/45000 [==============================] - 3962s 88ms/step - loss: 2.9171 - categorical_accuracy: 0.2432 - val_loss: 3.1072 - val_categorical_accuracy: 0.2193\n",
      "Epoch 38/50\n",
      "45000/45000 [==============================] - 3971s 88ms/step - loss: 2.9113 - categorical_accuracy: 0.2442 - val_loss: 3.1128 - val_categorical_accuracy: 0.2193\n",
      "Epoch 39/50\n",
      "45000/45000 [==============================] - 3967s 88ms/step - loss: 2.9051 - categorical_accuracy: 0.2451 - val_loss: 3.1168 - val_categorical_accuracy: 0.2188\n",
      "Epoch 40/50\n",
      "45000/45000 [==============================] - 3967s 88ms/step - loss: 2.8989 - categorical_accuracy: 0.2459 - val_loss: 3.1202 - val_categorical_accuracy: 0.2184\n",
      "Epoch 41/50\n",
      "45000/45000 [==============================] - 3970s 88ms/step - loss: 2.8927 - categorical_accuracy: 0.2468 - val_loss: 3.1261 - val_categorical_accuracy: 0.2181\n",
      "Epoch 42/50\n",
      "45000/45000 [==============================] - 3964s 88ms/step - loss: 2.8871 - categorical_accuracy: 0.2477 - val_loss: 3.1295 - val_categorical_accuracy: 0.2176\n",
      "Epoch 43/50\n",
      "45000/45000 [==============================] - 3960s 88ms/step - loss: 2.8812 - categorical_accuracy: 0.2486 - val_loss: 3.1361 - val_categorical_accuracy: 0.2173\n",
      "Epoch 44/50\n",
      "45000/45000 [==============================] - 3968s 88ms/step - loss: 2.8761 - categorical_accuracy: 0.2494 - val_loss: 3.1395 - val_categorical_accuracy: 0.2170\n",
      "Epoch 45/50\n",
      "45000/45000 [==============================] - 3991s 89ms/step - loss: 2.8702 - categorical_accuracy: 0.2504 - val_loss: 3.1419 - val_categorical_accuracy: 0.2168\n",
      "Epoch 46/50\n",
      "45000/45000 [==============================] - 4192s 93ms/step - loss: 2.8651 - categorical_accuracy: 0.2511 - val_loss: 3.1518 - val_categorical_accuracy: 0.2167\n",
      "Epoch 47/50\n",
      "45000/45000 [==============================] - 4200s 93ms/step - loss: 2.8596 - categorical_accuracy: 0.2519 - val_loss: 3.1560 - val_categorical_accuracy: 0.2162\n",
      "Epoch 48/50\n",
      "45000/45000 [==============================] - 3833s 85ms/step - loss: 2.8546 - categorical_accuracy: 0.2528 - val_loss: 3.1646 - val_categorical_accuracy: 0.2160\n",
      "Epoch 49/50\n",
      "45000/45000 [==============================] - 4370s 97ms/step - loss: 2.8490 - categorical_accuracy: 0.2535 - val_loss: 3.1693 - val_categorical_accuracy: 0.2158\n",
      "Epoch 50/50\n",
      "45000/45000 [==============================] - 4598s 102ms/step - loss: 2.8443 - categorical_accuracy: 0.2543 - val_loss: 3.1700 - val_categorical_accuracy: 0.2153\n"
     ]
    }
   ],
   "source": [
    "# With CUDNN implementation it runs faster, does take a lot of time to begin though\n",
    "history = model.fit(x=create_dataset(X_train), validation_data=create_dataset(X_test), epochs=50, \n",
    "                    use_multiprocessing=True, # allows multiple cpu cores to work on generating data samples at once\n",
    "                    max_queue_size=50, # size of the queue of training items the cpu will create\n",
    "                    workers=4) # number of processes that will generate items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "treated-paraguay",
   "metadata": {},
   "outputs": [],
   "source": [
    "# uncomment these lines to use them\n",
    "# model.save(\"language_model.h5\")\n",
    "model = tf.keras.models.load_model(\"language_model.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "military-defeat",
   "metadata": {},
   "outputs": [],
   "source": [
    "preds = model.predict(create_dataset(X_test[:100]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "appropriate-friend",
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = X_test[:1000, 1:300]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "false-titanium",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<START> it has been years since it s original run i would have hoped by now some marketing wizard would have promoted a live actor version of this classic by now or at least sought to re release the original episodes i can t fathom why the sci fi or cartoon network haven t snapped this up galaxy rangers actually had well thought out plots and even better scripts the animation was above average quality for it s time and excellent when compared to the talking slide show japanese animation of today it <UNK> the heavy toon toy tie in market this may have sealed it s doom too i would willingly spend cash on a dvd of gr if available <END> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK>'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "item = 2\n",
    "index_to_string(X_test[item])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "advisory-bumper",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'i s been a since i was a and i m like to it the that of and <UNK> have been a <UNK> action of of the film in the i even least a to be <UNK> it original version and have t wait how the show fi channel sci network would t been it up and <UNK> is were a to out and and i the films and <UNK> was <UNK> average and and the s time and the acting i to the original <UNK> show the series of the s was the <UNK> <UNK> <UNK> <UNK> in the i is be been the s <UNK> it it m recommend recommend the on a dvd of the s you on <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK> <MASK>'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index_to_string(np.argmax(preds[item], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "dense-joshua",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    0, 18562, 16679, 19721, 18461,  9055, 19774, 17816, 19892,\n",
       "       19319, 19434, 19920, 18342, 17951,     2, 19876, 16874, 17853,\n",
       "       18764, 19919], dtype=int64)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.flip(np.argsort(preds[58, 100]))[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "reduced-reply",
   "metadata": {},
   "source": [
    "### Closure Creation \n",
    "Analyse performance on the test set given by solving closure problems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "diagnostic-router",
   "metadata": {},
   "outputs": [],
   "source": [
    "closures = np.zeros(X_test.shape, dtype='int')\n",
    "prompt = \"Overall the movie was\"\n",
    "for i in range(X_test.shape[0]):\n",
    "    review = index_to_string(X_test[i]).replace('<MASK> ', '').replace('<END> ', '').strip() # convert to string, removing the <END> and <MASK> tokens\n",
    "    closures[i, :] = string_to_idxs(review + \" \" + prompt)\n",
    "closures = create_dataset(closures)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "activated-authentication",
   "metadata": {},
   "outputs": [],
   "source": [
    "# we need to perform prediction in steps as otherwise the output doesn't fit in memory\n",
    "def generator_predict(closures):\n",
    "    for data in closures:\n",
    "        # find the last index that's not a zero in the input data (by looping back from the end to the first non-zero entry)\n",
    "        # then use that to get the prediction at the end\n",
    "        for i in range(batch_size - 1, -1, -1):\n",
    "            if data[0][0, i] != 0: break # data[0] is the input sequence, data[0][0 - the first sample, i - ith word]\n",
    "        \n",
    "        # take the prediction for the last word in the text before masking tokens\n",
    "        pred = model.predict(data)[0, i - 1, :] # i is the position of the end token 3, take the prediction for 1 further back\n",
    "        yield pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "convertible-faculty",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = closures.as_numpy_iterator()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "solved-switch",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[    2,  2463,  9856,   261,   118,   120,    27,   845,     8,\n",
       "            81,  1009,     5,    92,     7,    95,    30,    77,   211,\n",
       "             1,    17,    72,     4,  1310,     1,    33,     1,    69,\n",
       "             1,   196,     8,    97,    23,   422,     4,    52,   211,\n",
       "          5735,  2807,    80,   182,     6,  2700,  3208,  3606, 11481,\n",
       "           838,    11,    29,  4876,    44,  2573,    10,    46,     5,\n",
       "          1595,   307,     9,   778,     4,  2761,    20,    59,  5791,\n",
       "            34,    33,  7145,  1493,   980,   935,    12,   105,   285,\n",
       "          5158,     8,     6,    11,     4,  4036,    64,    27,  2555,\n",
       "            46,    36, 10620,     6,     1,   187,    29,  3843,  4036,\n",
       "             5,  8624,    27,  1165,    49,     6,  7812,   443,  5237,\n",
       "          1838,    11,    64,    27, 17862,  2595,  5850,     8,     4,\n",
       "           388,    17,    72,    17,   112,   490,     1,    91,    86,\n",
       "             6,  1256,  1932,     6,   768,  5947,    40,    50,   290,\n",
       "            41,     4,   862,  1831,  1593,     1,   210,     4,  3904,\n",
       "          1998,     7,     6,  2287, 15821,    43,   102,    80,   143,\n",
       "             1,     1,     4,   228,    49,   616,  1020,   409,     4,\n",
       "           369,     7,     4,    60,    10,    15,    33,    39,   749,\n",
       "             5,   350,   568,    10,    15,    26,    33,    77,  9856,\n",
       "           128,   729,     6,   757,   697,     8,    29,   177,     5,\n",
       "             9,  4083,     4,   121,   281,    11,    13,   485,     7,\n",
       "           266,    14,   155,    24,   135,    76,    21,    27,    53,\n",
       "            81,     4, 13114,  1008,   210, 10780,     8,     4,  4484,\n",
       "          3104,    92,     7,     4,  1923,   359,   132,    27,  1521,\n",
       "            29,   140,     1,     1,     5,     1,   535,   409,    20,\n",
       "            51,  4091,    21,    23,    28,   215,  1899,    23,    28,\n",
       "           152,  1433,  9856,   117,     4,  1008,    13,    18,     9,\n",
       "           370,    37,   373,   450,     7,    60,     4,   117,   143,\n",
       "            17,    10,     9,     9,   431,     7,    15,   101,  1635,\n",
       "           345,  2775,   896,  1369,   869,    15,    56,   526, 11402,\n",
       "           550,   138,    21,    10,    15,    26,    62,  1698,   724,\n",
       "            17,    52,    17,   349,     6,  2416,  1794,    43,     6,\n",
       "          1369,   869,    22,    89,    24,  1246,   131,   439,     4,\n",
       "            18,    16,     3]]),\n",
       " array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        ...,\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [1., 0., 0., ..., 0., 0., 0.]], dtype=float32))"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "accepted-excess",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([6.4843495e-09, 6.8171727e-03, 1.8615570e-09, ..., 2.3755327e-08,\n",
       "       4.7915944e-07, 2.1847498e-08], dtype=float32)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next(generator_predict(closures))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "advisory-contrary",
   "metadata": {},
   "outputs": [],
   "source": [
    "# calculate the predicted probabilities\n",
    "pred_probas = [] # the probability that the next word is \"Good\" if it must by Good or Bad\n",
    "for i, pred in enumerate(generator_predict(closures)):\n",
    "    good_prob = pred[vocab_idx['good']] # great\n",
    "    bad_prob = pred[vocab_idx['bad']]   # poor\n",
    "    total = good_prob + bad_prob\n",
    "    pred_probas.append(good_prob/(good_prob + bad_prob)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "joined-gross",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Find the cutoff that gives the highest accuracy\n",
    "pred_probas = np.array(pred_probas)\n",
    "c_range = np.arange(0, 1, 0.001)\n",
    "accuracies = []\n",
    "for cutoff in c_range:\n",
    "    class_preds = (pred_probas > cutoff).astype(int)\n",
    "    accuracies.append(accuracy_score(y_test, class_preds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "experienced-harrison",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA2YklEQVR4nO3dd3gc5bX48e9Rr5as4ibJkrtxx5ZtTOjVFIfQgikh4RIISUiHC8mPAJfkBhLSA1yHEEJoNjg0Q0wLvRkX3HuXZdlWcZEsW5KlPb8/ZmTWYmWtZK1md3U+z6NHuzPzzpzZnZ0z78w784qqYowxxrQU43UAxhhjwpMlCGOMMQFZgjDGGBOQJQhjjDEBWYIwxhgTkCUIY4wxAVmC6CIi8ksRqRSRne77i0Vkm4jsF5HjPYyrQ3GIyN0i8mQoY2tluf3dWGO7etmdRRz/EJE9IjLfHfZtEdnlrlu21zEaA5YgOo2IbBGRg+4PvPnvAXdcAfATYISq9nGL/Ba4WVXTVHXxMSxXRWTwMYTeKXF0FVUtcWNt8jqWZh1IlicBZwP5qjpJROKB3wPnuOtWFZJAARH5hoh82Enz2iIiZ3XGvFrM15ODD/NFcV4HEGWmqep/AgwvBKpUtbzFsJVdE9ZRhUsc3UkhsEVVa933vYEk7HuICCIigKiqz+tYQk5V7a8T/oAtwFkBhp8FHAR8wH5gpvtfgVpgoztdP+A5oALYDHzfbx6xwM+AjUANsAgoAN73m89+4IoAy48B7gC2AuXA40AGkBgojgDlRwJvAruBXcDP3OF3A8+686vB2bkV+5W73S/eVcDFfuO+AXyIU3vZ467veX7jB7jrVgP8B3gQeNIdV+TGHOe+fxf4BfCRO/0bQI7fvK51170K+Hlr35M7bTLwO3f6fW6MycBpQGmg7xuYCjQAh9zPc6nf9znH/dw2ADe4w68H6oAmv+2h1l2n/cDbrcT2Zfcz3uuu83EtYrkFWObG/QyQFGAex7VY9l53eKL7XZS43/EMINkdlwO84i53N/ABzjb1BM42fdCd138HWF7Askfb3lv7PAPMu9Xtyx1/A7Dab/x4d3gB8Ly73CrgAb/t+Um/8kV8cTv7X5zt7CAwGLjObxmbgG+1iOEiYAlQ7cY6FbgcWNRiup8AL3q9Dwv4OXsdQLT8cfQdz2l8cQejwGD3dQzOTv9OIAEY6G5w57rjbwWWA8MAAcYC2S3n08qy/wtnBzUQSHN/HE8EiiNA2XRgh7sBJ7nvJ7vj7sbZ2ZyPk8DuBeb5lb3c3QnEAFfg7AT7uuO+4e4AbnDLfhsowzkqA/gEZ4eVgHM6ppqjJ4iNwFCcnfm7wH3uuBE4O5mT3Hn91l1ua9/Tg275PDeuE3F2noG+v8PfNy12Lu6w94CH3M9tHM4O6Uy/9f/Qb9oj1ilAXEPdz+9sIB74b/c7TfCLZb77eWfh7LRuamVeRyzbHfZHnGSW5X7HLwP3uuPuxUkY8e7fyX7f0+HPoJVlBSxL29v7Fz7PAPM+2vZ1ObAdmOgubzBOrS0WWAr8AUh1v5uTAi2z5XfibhclOAdMce76XAAMcpdxKnCAzxPRJJxkfbYbYx4wHGd72s2RCX4xcKnX+7CAn7PXAUTLn/tj2Y9ztNT813zUeBpHTxCTgZIW438K/MN9vRa4qJXltpUg3gK+4/d+GM5OMq6t8sCVwOJWxt0N/Mfv/Qjg4FHiWNK8Djg7qQ1+41LcOPoA/YFGIMVv/JMcPUHc4Tftd4DX3Nd3AjNbLKeBwDW9GJwjw7EBxgX6/rbQSoLAOUptAtL9ht0LPOa3/u1JED8Hnm0R63bgNL9YrvEb/xtgRivzarlswdm5DvIbNgXY7L6+B3gp0DZC2wkiYFna3t6P+DyD/P35b1+vAz8IMM0UnET9hc85wHcYaDu7p40YXmxeLvBX4A+tTPd/wP+6r0fi1KIT27O+XfVnF6k711dUNdPv729BlisE+onI3uY/nFNKvd3xBThHyR3RD+eUSbOtOEdAvQNPfoS2lrvT7/UBIElE4gBE5FoRWeK3PqNwTjl8oayqHnBfprnx7vYbBrCtjThbxpHmvu7nX9adZ2sXgHNwjig7+jn7a16HGr9hW3GOIjs6v8PfoTrnvre1mF9rn0FbcnES5yK/7+o1dzjA/Ti1lTdEZJOI3N6OuFsr29b23qY2tq/WttsCYKuqNrZjHfwdsR2KyHkiMk9EdrsxnB9EDAD/BK5yr2V8DSf513cwppCyi9ThYRvOEduQo4wfBKzowLzLcH6QzZqP0HcFGdeV7V2giBQCfwPOBD5R1SYRWYJztNqWHUCWiKT4JYmC9sbgN69hfnElA601Ia3EOWU2COc0hL9anJ1o83xi+XwHCs6Rpr8ynHVI90sS/XGO+juiDBjtt3zB+Uw6Mr+WsVbi1JxGquoX5ufG/xPgJyIyEnhHRBao6lsB5hVUWdre3o863yC2r+bfS0vbgP4iEhcgSRzxHePUZluNS0QSca6hXAu8pKqHROTFIGJAVeeJSAPOKber3L+wZDWI8DAfqBaR20QkWURiRWSUiEx0xz8C/EJEhrht6Mf4tZXfhXMOtzUzgR+JyAARSQN+BTwT5FHUK0AfEfmhiCSKSLqITA6iXCrOj6kCQESuwznCa5OqbgUWAneLSIKITAGmBVM2gH8B00TkRBFJAP6HVpKUe1T+KPB7EennfgdT3B3BOpza0QVuk9Q7cM4lN9sFFIlIjDuvbcDHwL0ikiQiY3AuTj/VwfV4FrhARM50l/8ToN5dRnvtAvLdz6N5vf8G/EFEegGISJ6InOu+vlBEBrtJqRrn1FmT37xa3faOUrat7f2IzzOAtravR4BbRGSC+3sZ7CaV+TgHDfeJSKr73XzJLbMEOEWc+2wycE55HU0CzjZQATSKyHnAOX7j/w5c535nMe5nOtxv/OPAA0CjqnZKs+NQsATRuV6WI++DeCGYQuq06Z+GczFzM85R3SM4rY3AaSP/LE4LnWqcjS/ZHXc38E+3qv3VALN/FKfFyfvuvOuA7wUZVw3ORbZpOKcw1gOnB1FuFU5roE9wfuyjcVp/BOtqnPPFVcAvcVrltLsKrqorcdZ1Fs6OoQanJVdr87oFpzHAApwLib/GaXWzD+faxiM4R+21QKlfudnu/yoR+cx9fSXOeewy4AXgLlV9s73r4K7HWuAa4C8428Y0nCbVDR2Y3ds4raF2ikilO+w2nFNB80SkGqflWHPNa4j7fj/O9/mQqr7rjrsXuMPd9m4JsKyAZYPY3gN9noe1tX2p6mycFkdP43znLwJZfssdjHPBuRTnAjfud/MMTkuwRTgHR61yfxvfx/ld7sGpBczxGz8fp5XTH3AuVr/HkTX5J3CS2hNHW47XmlsjGBO2ROQZYI2q3nWM80nDaTwwRFU3d0ZsxnSEe7qzHKfV03qv42mN1SBM2BGRiSIyyK2aT8VpT/5iB+c1TURSRCQVp5nrcpzWN8Z46dvAgnBODmAXqU146oNzv0Y2zmmAb2vHHwNyEU41XnCubUxXqzYbD4nIFpzt8SveRtI2O8VkjDEmIDvFZIwxJqCoOsWUk5OjRUVFXodhjDERY9GiRZWqmhtoXFQliKKiIhYuXOh1GMYYEzFEZGtr4+wUkzHGmIAsQRhjjAnIEoQxxpiALEEYY4wJyBKEMcaYgCxBGGOMCcgShDHGmIAsQRhjosqG8hqWle5l5vwSfL4juvo07RRVN8oZY7qvNTurmb2wlL9/+PmT3H/6/HIA0hPjuHRCPgu37mbamH6kJ8Vzyfg8DjQ0UVvfSEFWSmuz7dai6mF9xcXFandSG9P93PfqGma893kX0EN6OV1yry/ff9RycTFCo0+JixHunDaCr51QiNMBXvchIotUtTjQOKtBGGMiUpNPWbJtL//vheWs2el0/f2/F4/iq8UFxMceefa8odFH5f56tu89yCMfbKKmrpGPN1bR6HMOkBt9yp0vrQTgmsmFxMR0ryTRGksQxpiIs6e2gdN++y77Dh46POyV753EqLyMgNMnxMXQLzOZfpnJTCzKQlV5e0054woySU6I5aMNVdz10grufGkl2/ce5KfnHddVqxLW7CK1MSbi3PbcMvYdPERqQiy//+pY1vxiaqvJIRAR4czjepOdlkhKQhxnj+jNt04dBMBf39tkF7VdliCMMRHl1eU7eGPVLqYMzGbRz8/mkvH5JMXHHvN8v35iET88awgAc5aWHfP8ooElCGNMxGhs8vGnt9aTl5nMP66b2CmJwd/Npw/m+P6Z/Oz55ZRUHejUeUciSxDGmIjxl7c3sGZnDdd9qajTkwNAXGwM914ymtqGJmYtKOn0+UcaSxDGmIgwZ2kZf357PZccn8f1Jw0I2XKG9+nBlIHZvLysjPrGppAtJxJYgjDGhL17Xl7F92cuZkTfHvzqktEhv1fhqsn92bb7ID97fkVIlxPuLEEYY8LSgYZG7nhxOWf89l0e/Wgz543qw5PXTw7JqaWWLhzTl4E5qbywuJRd1XUhX164sgRhjAk7mytrGXHn6zw5r4RNlbUMyEnl5xeOoGdqQpcsX0R49BsT8SnMmr+tS5YZjuxGOWNMWJk1v4Tb3WcofevUgVx/0gB6pSd1eRxFOamcMjSXmfNL+O7pg4iL7X7H0yFdYxGZKiJrRWSDiNweYHyGiLwsIktFZKWIXBdsWWNM9Kk71MRvXl+LCPznx6fy0/OO8yQ5NLtmcn92Vtfxn9XlnsXgpZAlCBGJBR4EzgNGAFeKyIgWk30XWKWqY4HTgN+JSEKQZY0xUebZhdvYXdvA0988gcHuA/e8dMbwXvTLSOKpT7d6HYonQlmDmARsUNVNqtoAzAIuajGNAuniNElIA3YDjUGWNcZEka1Vtfxq7mrG98/khIFZXocDOPdFXDohnw/WV1Jb3+h1OF0ulAkiD/C/ulPqDvP3AHAcUAYsB36gqr4gyxpjokRJ1QG+9cQi4mJiePDq8WH1yO2R/ZxnPDU/MbY7CWWCCPQNt3wC1rnAEqAfMA54QER6BFnWWYjIjSKyUEQWVlRUdDxaY4wnyqvrOOX+d1izs4bfXDaGvhnJXod0hPGFmSTFx/CvRd2vNVMoE0QpUOD3Ph+npuDvOuB5dWwANgPDgywLgKo+rKrFqlqcm5vbacEbY0KvpOoANz25CIBfXzqa80f39TiiL+qVnsSZw3szc/42VpVVex1OlwplglgADBGRASKSAEwH5rSYpgQ4E0BEegPDgE1BljXGRLBXl+/g7D+8x2cle/nRWUO5YmJ/r0Nq1c8uOI64GOGX/17ldShdKmT3Qahqo4jcDLwOxAKPqupKEbnJHT8D+AXwmIgsxzmtdJuqVgIEKhuqWI0xXatyfz23P7+c4X3Suf/ysQztne51SEeVl5lM/6wUPt5YxbLSvYzJz/Q6pC4R0hvlVHUuMLfFsBl+r8uAc4Ita4yJDg+9s5Ha+kZ+99WxDO4V3smh2R+nj+PLD3zEqyt2dpsE0f1uDTTGeGp3bQOzF23j/NF9IyY5AIzJz2RCYU+eWbCt2zzl1RKEMabL1B1q4rbnllF3qInvnD7I63Da7cpJ/dld28CG8v1eh9IlLEEYY7rMLbOX8uaqXdw2dTjD+/TwOpx2m1DYE4DPtu7xOJKuYQnCGBNyNXWHOP9PH/DKsh3cdOogvnnyQK9D6pCi7BQG5aby7+U7vA6lS1iCMMaE1EtLtjP5V2+xakc1XzuhkFvPHeZ1SB0mIpw7sg/zN+/mYEP0X4ewx30bY0Lm442V/PCZJRxfkMl3ThvMWSN6ex3SMRtXkIlPYdWO6sOnnKKVJQhjTEisLNvHVX/7lLzMZJ765gkkJ4S+J7iu0NzEdXnp3qhPEHaKyRgTEr95bS3J8bE88vXiqEkOAL17JJKbnsiy7fu8DiXkLEEYYzrdul01vLeugpvPGMxxfSOvtdLRiAhj8jJYYQnCGGPa72/vbyIuRrh8Qr7XoYTEqLwMNpTvj/o+IixBGGM61btry5m9qJRvnjyQXj286y40lEbnZeBTWLMzup/uagnCGNOpHvt4C3mZyfzo7CFehxIyw/o4jwhZuzO676i2BGGM6TTLSvfy0YZKzhvVh8S46Lkw3VJ+z2RSE2JZazUIY4wJzj0vryIjOYEbT4nMO6WDJSIM7ZPO6h3R3Q2pJQhjTKd4acl2Fm7dw/fOGBy11x78TejfkyWle6k7FL13VFuCMMYcs5q6Q9z50koG5qby1eKCtgtEgRMHZ9PQ6IvqB/dZgjDGHLNfvrKafQcP8acrjo+qm+KOZtKAbGJjhI82VnodSshYgjDGHJOFW3bzzMJtXDI+j9H5GV6H02XSEuMYm5/BxxurvA4lZCxBGGM6rHJ/Pd+buZiCrGTu/vJIr8PpclMGZbOsdF/UXoewBGGM6bBfvrKKqtoG/u/qCfRIivc6nC43sl8GTT6N2h7mLEEYYzqkvKaOF5eUcd2XihiV131OLflrvmFuzc7obO5qCcIY0yELNjutd84Y1svjSLxTlJ1KYlxM1N4wF9IEISJTRWStiGwQkdsDjL9VRJa4fytEpElEstxxW0RkuTtuYSjjNMa0z8GGJu5/fQ05aYmM7Ka1B4DYGGFI7zSrQbSXiMQCDwLnASOAK0VkhP80qnq/qo5T1XHAT4H3VHW33ySnu+OLQxWnMab97nxpBVuqDvCn6eNIS+ze/Y4N693DEkQHTAI2qOomVW0AZgEXHWX6K4GZIYzHGNMJlpXuZfaiUq45oT9fGpzjdTieG94nnYqaenbXNngdSqcLZYLIA7b5vS91h32BiKQAU4Hn/AYr8IaILBKRG1tbiIjcKCILRWRhRUVFJ4RtjDma11bsJC5GuOWcYV6HEhY+v1AdfdchQpkgJMAwbWXaacBHLU4vfUlVx+OcovquiJwSqKCqPqyqxapanJube2wRG2OOau+BBp6eX8LEoiwyUxK8DicsDD/86O/oO80UygRRCvg/lCUfKGtl2um0OL2kqmXu/3LgBZxTVsYYDz3/2Xb2HjjELecO9TqUsJGbnkhOWgIrtlsNoj0WAENEZICIJOAkgTktJxKRDOBU4CW/Yakikt78GjgHWBHCWI0xbaitb+ShdzcytiCTCYVZXocTNkSEcQU9WVwSfQ/tC1mCUNVG4GbgdWA18KyqrhSRm0TkJr9JLwbeUNVav2G9gQ9FZCkwH/i3qr4WqliNMW17ZsE2KvfXc8cFx3kdStgZX5jJpspa9kTZheqQtk9T1bnA3BbDZrR4/xjwWIthm4CxoYzNGBO8TRX7ue+1NUwo7ElxYU+vwwk7Y/MzAVhZVs1JQ6KnZZfdSW2MadNPZi8lRuDXl45GJFD7k+7tuL49AFi1Y5/HkXQuSxDGmKP6YH0Fi0v2ctvU4Qzule51OGEpKzWBvhlJrCyLrgvVliCMMa1SVX7/5jryMpOZPrG/1+GEtZH9erDKEoQxprt4e005i0v2cvMZg7tNT3EdNaJvDzZW7I+qviEsQRhjWvXvZTtIT4zj8gn5XocS9gb1SsOnsLXqgNehdBpLEMaYgHZV1/Hiku1cOLYvcbG2q2jLgJxUALZU1bYxZeSwb90YE9Dc5TvwKVx/0gCvQ4kIhdlugqi0BGGMiWI+n/LkvK2M7NfDWi4FKSM5nqzUBKtBGGOi25urd7GxopYbTxnodSgRpSg7hc1WgzDGRCtV5f/e3UhBVjIXjO7rdTgRpSgnlS2VdpHaGBOllpbuY8m2vdx48kC7ON1ORdmp7Kyu42BDdDR1tW/fGHOYqvLUvK0kxsVw0fEB+/cyR1HktmTaujs6TjNZgjDGHPant9Yze1Ep0ycW0CMp3utwIs6AKGvJZAnCGANAQ6OPJ+eVcPqwXO6aNtLrcCJSYU4KAJuj5DqEJQhjDAAvLt5O5f56rj2xiJgYe2JrR/RIiic7NYGtUdLU1RKEMYaKmnrueWUVo/J6cNpQ69v9WBTlpEZNU1dLEMYYfvv6WvbXN/LDM4dafw/HqCg7NWpulrMEYUw3t7mylmcXbWPa2H6cNaK31+FEvAE5KeyqrudAQ6PXoRwzSxDGdHOzF25DFX589lCvQ4kKzc9kioanulqCMKYb8/mU99dXMDY/4/DTSM2xOfxU1yi4DmEJwphu7K015azYXs0V1ltcp2m+WW5zFFyHCGmCEJGpIrJWRDaIyO0Bxt8qIkvcvxUi0iQiWcGUNcYcuw/XV5AQG8OlE+yu6c6SlhhHVmoCpXsOeh3KMQtZghCRWOBB4DxgBHCliIzwn0ZV71fVcao6Dvgp8J6q7g6mrDHm2KzdWcMT87Zy3ug+JMZZd6KdKb9nMtt22zWIo5kEbFDVTaraAMwCLjrK9FcCMztY1hjTTi8u2Y5P4Y4L7Nirsw3MSWXdrhqvwzhmoUwQecA2v/el7rAvEJEUYCrwXAfK3igiC0VkYUVFxTEHbUx3sG5XDX//cDMXjOlLbnqi1+FEndH5meyqrqdyf73XoRyTUCaIQHfbaCvTTgM+UtXd7S2rqg+rarGqFufm2h2gxgTjx88uISUhlrumWe0hFPJ7JgOwc1+dx5EcmzYThIhcKCIdSSSlQIHf+3ygrJVpp/P56aX2ljXGtMOH6ytZsb2aH5w5hF7pSV6HE5V6ubWyXdVRniBwdt7rReQ3InJcO+a9ABgiIgNEJMGdz5yWE4lIBnAq8FJ7yxpj2qfJp9w5ZwU9U+K5aJy1XAqV3j2cxFteE9mnmOLamkBVrxGRHjgXkf8hIgr8A5ipqq1ehVHVRhG5GXgdiAUeVdWVInKTO36GO+nFwBuqWttW2Y6tojGm2Rsrd7KpopYHrjqerNQEr8OJWjlpTg2ivDrKEwSAqlaLyHNAMvBDnJ36rSLyZ1X9y1HKzQXmthg2o8X7x4DHgilrjOk4VeWhdzcyICeV80ZZX9OhlBAXQ1ZqAuU1UX6KSUSmicgLwNtAPDBJVc8DxgK3hDg+Y0wn+XBDJcu37+OmUwcSa/09hFyv9MSIvwYRTA3icuAPqvq+/0BVPSAi/xWasIwxnW3u8h30SIrj4uPzvQ6lWyjISon4joOCuUh9FzC/+Y2IJItIEYCqvhWiuIwxnehAQyNvrS5nYlEWCXH2CLauUJSdwtaqA/h8rbXuD3/BbCmzAZ/f+yZ3mDEmQjzxyVbKa+q56bRBXofSbRRmp1Lf6IvolkzBJIg493EXALivrfmDMRGiodHHPz7awomDsplYlOV1ON1GkdsvRCT3LhdMgqgQkS83vxGRi4DK0IVkjOlMf3l7PTur6/jmyQO8DqVbKcxOAYjo6xDBXKS+CXhKRB7AeQTGNuDakEZljOkUn5Xs4S9vb+DsEb05fVgvr8PpVvplJhMfK2yJ4J7lgrlRbiNwgoikAXK0m+OMMeHl0Q83k54Uxx+vGIeINW3tSrExEvEtmYK6UU5ELgBGAknNG5mq3hPCuIwxx2jHvoO8umIn//WlIlITg/qpm05WlJ3KlsrIrUEEc6PcDOAK4Hs4p5guBwpDHJcx5hg9Na8EnyrXTinyOpRuq39WSkR3HBTMReoTVfVaYI+q/g8whSOftGqMCTN1h5p4en4JZx3Xm4KsFK/D6bZy0xOpqW+k7lCT16F0SDAJovle8QMi0g84BFhzCGPC2CvLdrC7toHrTizyOpRuLdt9IGJVbUMbU4anYBLEyyKSCdwPfAZs4ci+G4wxYURV+cdHmxnaO40pg7K9Dqdba36qa2WE3ix31CtXbkdBb6nqXuA5EXkFSFLVfV0RnDGm/ZaW7mNlWTX/e/Eoa7nksRy346BI7Xr0qDUIVfUBv/N7X2/JwZjw9ummKgCmjuzjcSQmJ805xRSVCcL1hohcKnYoYkzYU1VeXlbGkF5pZLunN4x3Dp9i2h+Z1yCCaRz9YyAVaBSROpymrqqqPUIamTGm3d5YtYsV26v59aWjvQ7FAEnxsaQlxlERjdcgAFQ1vSsCMcYcG59PueflVRRlp/CV462/6XCRk5YQsaeY2kwQInJKoOEtOxAyxnjr7x9uZvveg/zhirEkxsV6HY5x5aQlRm+CAG71e50ETAIWAWeEJCJjTLtV1NRz32tryE5N4KzjensdjvGTk5bIhor9XofRIcGcYprm/15ECoDfhCwiY0y7vb1mF00+5fHrJ5GeFO91OMZPbnoi8zZXeR1Gh3Sk78FSYFQwE4rIVBFZKyIbROT2VqY5TUSWiMhKEXnPb/gWEVnujlvYgTiN6TbeXFVOXmYyI/pa25Fwk5OWyN4Dh2ho9LU9cZgJ5hrEX4DmTlVjgHHA0iDKxQIPAmfjJJUFIjJHVVf5TZMJPARMVdUSEWn5wPrTVdU6JzLmKNburOH9dRVcNbm/3RgXhnLdm+Wqauvpm5HscTTtE8w1CP+j90Zgpqp+FES5ScAGVd0EICKzgIuAVX7TXAU8r6olAKpaHlTUxhgAqusO8Z2nFpGeFMfNZwz2OhwTQHOCqKiJzgTxL6BOVZvAqRmISIqqtvUM2zyc3uealQKTW0wzFIgXkXeBdOBPqvq4O05xbtJT4K+q+nCghYjIjcCNAP379w9idYyJHk/NK2FjRS1PXD/p8E1ZJrxE8t3UwVyDeAvwT3vJwH+CKBeorqst3scBE4ALgHOBn4vIUHfcl1R1PHAe8N2jNLd9WFWLVbU4Nzc3iLCMiQ6qynOflVJc2JOTh9i2H678axCRJpgEkaSqh9toua+DecB8KUf2G5EPlAWY5jVVrXWvNbwPjHWXU+b+LwdewDllZYxxvbJsBxvK9/PVidY9SzjLTU9EBMr21rU9cZgJJkHUisj45jciMgE4GES5BcAQERkgIgnAdGBOi2leAk4WkTgRScE5BbVaRFJFJN1dXipwDrAiiGUa0y2oKk/M20rfjCQuG5/vdTjmKBLjYumXkUxJBPYsF8w1iB8Cs0Wk+ei/L04XpEelqo0icjPwOhALPKqqK0XkJnf8DFVdLSKvAcsAH/CIqq4QkYHAC26LjDjgaVV9rZ3rZkzUen99JfM37+aXXxlFTIy1XAp3BVnJEdn1aDA3yi0QkeHAMJzrCmtU9VAwM1fVucDcFsNmtHh/P05nRP7DNuGeajLGfNG7a8tJiIvhsglWe4gEOWmJrCqr9jqMdmvzFJOIfBdIVdUVqrocSBOR74Q+NGNMIHWHmnhh8XbOOq4XSfH2zKVIEKnPYwrmGsQNbo9yAKjqHuCGkEVkjDmqOUvK2HvgENdOKfI6FBOkrNQEqusaI+5u6mASRIx/Z0HuHdIJoQvJGNMaVeWxj7cwrHc6kwdkeR2OCVKfHkkA7KqOrJZMwSSI14FnReRMETkDmAm8GtqwjDGB/OOjLazaUc21JxbaYzUiSH6WcytZpF2oDiZB3IZzs9y3ge/itDiKrPvFjYkCVfvr+fPb60mOj+Ur46xDoEhS0NO5dSzSmrq2mSBU1QfMAzYBxcCZwOoQx2WMaeG5z0rZe+AQs2+aQmpiMC3UTbjom5FEXIywbU9kJYhWtzL3kRfTgSuBKuAZAFU9vWtCM8Y08/mUJ+eVMCqvB6PyMrwOx7RTXGwM/TKT2bY7mHuMw8fRahBrcGoL01T1JFX9C9DUNWEZY/wt3LqHkt0HuOHkgV6HYjqoICs54moQR0sQlwI7gXdE5G8iciaBH8BnjAmxRz7YRFJ8jHUnGsEKeqZEz0VqVX1BVa8AhgPvAj8CeovI/4nIOV0UnzHd3pqd1byxahffPGmgXXuIYAVZKVTub+BAQ6PXoQQtmIvUtar6lKpeiPNE1iVAwO5DjTGdy+dTvvf0YnqmxPP1E4u8Dsccg4IspyVT6Z7IuQ7Rrj6pVXW3qv5VVc8IVUDGmM+9t66C9eX7uWvayMP9CpjIlN/TuTtge7QmCGNM1znY0MSvX1tD34wkLhjT1+twzDHqFYEdB9kJTWPC1J0vrWDtrhoe/fpE4mPtWC7SNXcJW14TOY/bsK3OmDD08cZKZi8q5TunDeL04b28Dsd0gqT4WHokxUVUDcIShDFhprymjp8+v5z8nsl874whXodjOlFueiIVEfTYbzvFZEyYefDtDZTtPcisG6dYfw9Rpld6ktUgjDEdU7W/nic/LWHa2H5MKOzpdTimk+WmJ1JuCcIY0xEvLN5Ok0+57sQBXodiQiA3PdFqEMaY9lNVnv60hIlFPRmV18PrcEwI5KYncqChidr6yLib2hKEMWFi1Y5qNlXWcsn4fOsMKErlHm7qGhm1iJAmCBGZKiJrRWSDiAR8PIeInCYiS0RkpYi8156yxkSTl5fuIC5GmDqyj9ehmBDJjbCb5ULWisntu/pB4GygFFggInNUdZXfNJnAQ8BUVS0RkV7BljUmmqgqrywr46QhOfRMtS7fo1WvHpGVIEJZg5gEbFDVTaraAMwCLmoxzVXA86paAqCq5e0oa0zUePSjLZTuOciXx/bzOhQTQs2nmCoi5G7qUCaIPGCb3/tSd5i/oUBPEXlXRBaJyLXtKAuAiNwoIgtFZGFFRUUnhW5M13l3bTm/eGUVU0f2sQQR5XqmJBAbIxFzDSKUN8oFusqmAZY/AafnumTgExGZF2RZZ6Dqw8DDAMXFxQGnMSZcqSp/ems9BVnJ/HH6OOLsmUtRLSZGyE5NoDJC7qYOZYIoBQr83ucDZQGmqVTVWqBWRN4HxgZZ1piI91nJHhaX7OWei0baXdPdRG56IpX7G7wOIyihPFxZAAwRkQEikgBMB+a0mOYl4GQRiRORFGAysDrIssZEvIff30RGcjyXTcj3OhTTRXLSIudmuZDVIFS1UURuBl4HYoFHVXWliNzkjp+hqqtF5DVgGeADHlHVFQCByoYqVmO88MnGKl5fuYsfnjWElAR7LFp3kZueyLpdNV6HEZSQbpWqOheY22LYjBbv7wfuD6asMdFiT20D35+1mKzUBG46dZDX4ZgulJOWSOX+elQ17G+ItCtixnjgn59soaKmnlvOGWbXHrqZ3PREDjUpew8c8jqUNlmCMKaLqSpvrNzF6LwMrprc3+twTBfr0yMJgJ3V4X8vhCUIY7rYf1aXs2pHNV+bUuh1KMYDfTOdBLFj30GPI2mbJQhjupDPp/z+zXUUZqdwyfEB7/00US4vMxmA7XutBmGMcTX5lO/NXMzqHdV8/4whdlNcN5WTlkhcjLBjb/jXIKxtnTFdoLHJx89fWsm/l+/g+2cO4ZLxVnvormJjhD4ZSZRZgjDGHGry8bPnlzN7USknD8nhB2cOCfvmjSa0+mUkU7bPTjEZ0+3d//paZi8q5ftnDuHx/5pEbIwlh+6ub2aSXaQ2prvbse8gj364mXNH9ubHZw+1moMBoF9mMjv31eHzhffzRS1BGBMiqsoPZi2h0adcf9JAr8MxYaRX881yB8P7ZjlLEMaEyLvrKpi/eTd3TRvBpAFZXodjwkiO23FQuD/22xKEMSGw7+Ahbn9uGf2zUrh6st0QZ44UKQnCWjEZEwIPvL2eXdX1PPftKSTE2XGYOVJOmtPveLj3C2FbrjGdbM7SMv72wWamTyxgQqGdWjJfdLgGEeb9QliCMKYTfbi+kv/+11KO75/JL78yyutwTJjKSI4nLkaoqrUEYUzUU1X++t5Grvn7pyTHx/LgVePtURqmVTExQlZqApU1dorJmKj39w83c++raxiV14OnbziBfu4D2YxpTU5aYtjXIOwitTHHaE9tA795bS1nj+jNX6+ZQIzdKW2CkJOeSIVdpDYmem2q2M83HluAT5WfnDPUkoMJWk5qQthfpLYahDEdNGdpGT96ZgkC/Ori0Qzv08PrkEwE6Z2RRHlNHU0+Ddvnc1mCMKad6hub+Nv7m/jdm+sYnZfB364tprfbjaQxwSrMSuFQk7Jj30Hye6Z4HU5AIT3FJCJTRWStiGwQkdsDjD9NRPaJyBL3706/cVtEZLk7fGEo4zQmWIeafHx1xif89o11XDimH7NuPMGSg+mQwuxUALZWHfA4ktaFrAYhIrHAg8DZQCmwQETmqOqqFpN+oKoXtjKb01W1MlQxGtNef/9wM0tL9/HLr4zi6sn97emspsMKs51aw5aqWr40OMfjaAILZQ1iErBBVTepagMwC7gohMszJmRUlQff2cDv3ljLuSN7c80JhZYczDHp0yOJhLiYsK5BhDJB5AHb/N6XusNamiIiS0XkVREZ6TdcgTdEZJGI3NjaQkTkRhFZKCILKyoqOidyY/ws2LKbm59ezP2vr2VcQSb3XTLG65BMFIiJEQqzUthaVet1KK0K5UXqQIdXLXvH+AwoVNX9InI+8CIwxB33JVUtE5FewJsiskZV3//CDFUfBh4GKC4uDu/eN0xE8fmUu+as5Il5W0mIjeGK4gJ+dcnosG1xYiJPYXZqWNcgQpkgSoECv/f5QJn/BKpa7fd6rog8JCI5qlqpqmXu8HIReQHnlNUXEoQxoeDzKbf8aynPf7ad80f34b5Lx9AjKd7rsEyUKcxO4cMNFahqWJ6yDOUppgXAEBEZICIJwHRgjv8EItJH3E9FRCa58VSJSKqIpLvDU4FzgBUhjNWYw3w+5Rf/XsXzn23nB2cO4cGrxltyMCFRlJ1C3SEf5WF6w1zIahCq2igiNwOvA7HAo6q6UkRucsfPAC4Dvi0ijcBBYLqqqoj0Bl5wc0cc8LSqvhaqWI1ppqrcPPMz5i7fydenFPLDs4aE5ZGdiQ7+TV3Dsbl0SG+UU9W5wNwWw2b4vX4AeCBAuU3A2FDGZkwgz322nbnLd3LWcb25+8sjLTmYkPJv6hqO3dLas5iMcb21ehe3zF7K2PwMHv7aBEsOJuTyMpOJi5GwbclkCcIYYEP5fn4wawkDc1OZ8TV7IqvpGnGxMeT3TGZLmLZksgRhur3NlbXc+PhCkuJjePL6yfTNsL4cTNfpn51KSZgmCHtYn+nWnltUyu3PL0MVnvrmZOvox3S5ouwUFpfsCcumrlaDMN3WW6t38ZPZS8lNS2TOzScxeWC21yGZbqgoO5WaukYq9odfU1dLEKZb+ufHW/jm4wsZ0iuNt285jRH9rC8H443mbW9VWXUbU3Y9SxCmWznU5OOP/1nH3S+v5NShuTx9wwkkxcd6HZbpxob0SgOca2Hhxq5BmG6jyadc/cinzN+8m8kDsnjo6vGkJNhPwHgrKzWBpPgYtu856HUoX2C/DtMtVO6v575X1zB/827uvWQ0V07q73VIxgAgIuT3TGHr7vBryWQJwkS9RVt3c9mMTwD47umDmD6xoI0SxnStEX17sGjrHq/D+AK7BmGi2sPvb+SyGZ+gCn+9ZgK3njs87JoSGjMmP4Ptew9SGWYtmawGYaKOz6fMXrSNp+dvY+m2vYzJz+CRrxfTKz38HoZmDMCovAwAlm/fx+nDenkczecsQZio8vaaXdzxwgrK9tVRkJXMrecO44aTB5IQZ5VlE75G9uuBCCwvtQRhTKfz+ZTXV+7kB88socmn/ObSMVxenG+nk0xESE+KZ2BOKstK93kdyhEsQZiIt2jrHn7+4gpW7ahmWO90nvnWCWSmJHgdljHtMjovg3mbdnsdxhEsQZiI09jkY0VZNVuravnXolI+WF9Jr/REfnf5WC4c25fEOLvxzUSe4/r24MUlZew7cIiMlPDowdAShIkY762r4B8fbebdtRWHh/XLSOJHZw3l6hP6k5OW6GF0xhyboX3SAVhXXsPEovDoPMgShAlrs+aXMHPBNg7UN7K+fD+JcTFcO6WQYX3SGdUvg1F5GcRa3w0mCgzr7SSINTstQRgTUJNP+XRTFXNX7GDnvnr+s3oXyfGxnDwkh/NG9+XGUwaSlmibrYk+fTOSSE+MY93OGq9DOcx+acZTjU0+duyr44l5W3l/XQVr/H4ceZnJXDC6L7+6ZDQZyeFxTtaYUBERhvZJZ/n28GnJZAnChFzdoSb21zfiU2X+5t0s376P11bsZN/BQxxoaKKh0Qc4jxv47umDKMxOZdqYfiQn2MVm072cPaI39726hvfWVXDq0FyvwwltghCRqcCfgFjgEVW9r8X404CXgM3uoOdV9Z5gyprwoaqsLKumqraBlWX78PkUVdhRXcfy0n2sL6+h7pDv8PQicPKQXAZkpxAXG0PvHomcMjSX4X2sTwbTvX3thEJmzi/hZ88v571bTyMu1tsbPEOWIEQkFngQOBsoBRaIyBxVXdVi0g9U9cIOlu0UH66vpEk1FLP2jKpSXl1Poy/49Wry+dhVXY/yeZmGRh8VNfVs23OQnfvqvrCMndV1tLaItMQ4hvVJ56vFBQzulYYAA3PTGN+/p9UOjAkgNTGOOy4YwQ2PL+SOF1dw36VjPI0nlDWIScAGVd0EICKzgIuAYHbyx1K23W54fCEHDzWFYtYRRwRi/e4+FoFe6UlkpSZwwsBsWt6Y3DMlnpSEOHLSEhjUK40RfXuQ6l5EjosRu5PZmHY667heXD25P099WoIq9M1M4vLiAvI86C89lAkiD9jm974UmBxguikishQoA25R1ZXtKIuI3AjcCNC/f8ee8f/0DZNbPQqOZJkp8e1u8ZOdmuB5tdaY7kxE+PmFI9ixr45/L99BbUMjD72zkcLslFbL9ExJ4NmbpnR6LKFMEIEOHVvuhj8DClV1v4icD7wIDAmyrDNQ9WHgYYDi4uIO7eaP79+zI8WMMSYkkuJjefQbEwEoqTrAX95eT21DY6vT90gKTSu/UCaIUsC/Z5Z8nFrCYapa7fd6rog8JCI5wZQ1xpjuoH92CvdfPtaTZYfyXMICYIiIDBCRBGA6MMd/AhHpI+5JahGZ5MZTFUxZY4wxoRWyGoSqNorIzcDrOE1VH1XVlSJykzt+BnAZ8G0RaQQOAtNVVYGAZUMVqzHGmC8SjaLmncXFxbpw4UKvwzDGmIghIotUtTjQOGuuYowxJiBLEMYYYwKyBGGMMSYgSxDGGGMCsgRhjDEmoKhqxSQiFcDWDhbPASo7MZxIYOvcPdg6R79jWd9CVQ34bPGoShDHQkQWttbUK1rZOncPts7RL1Tra6eYjDHGBGQJwhhjTECWID73sNcBeMDWuXuwdY5+IVlfuwZhjDEmIKtBGGOMCcgShDHGmIC6VYIQkakislZENojI7QHGi4j82R2/TETGexFnZwpina9213WZiHwsIt70TNKJ2lpnv+kmikiTiFzWlfGFQjDrLCKnicgSEVkpIu91dYydLYhtO0NEXhaRpe46X+dFnJ1JRB4VkXIRWdHK+M7dh6lqt/jD6VdiIzAQSACWAiNaTHM+8CpOl6cnAJ96HXcXrPOJQE/39XndYZ39pnsbmAtc5nXcXfA9ZwKrgP7u+15ex90F6/wz4Nfu61xgN5DgdezHuN6nAOOBFa2M79R9WHeqQUwCNqjqJlVtAGYBF7WY5iLgcXXMAzJFpG9XB9qJ2lxnVf1YVfe4b+fhdO8ayYL5ngG+BzwHlHdlcCESzDpfBTyvqiUAqhrp6x3MOiuQ7vZamYaTIFrv2DkCqOr7OOvRmk7dh3WnBJEHbPN7X+oOa+80kaS963M9ztFHJGtznUUkD7gYmNGFcYVSMN/zUKCniLwrIotE5Nouiy40glnnB4DjcPqzXw78QFV9XROeZzp1HxayLkfDkAQY1rKNbzDTRJKg10dETsdJECeFNKLQC2ad/wjcpqpNbpfokS6YdY4DJgBnAsnAJyIyT1XXhTq4EAlmnc8FlgBnAIOAN0XkA1WtDnFsXurUfVh3ShClQIHf+3ycI4v2ThNJglofERkDPAKcp6pVXRRbqASzzsXALDc55ADni0ijqr7YJRF2vmC37UpVrQVqReR9YCwQqQkimHW+DrhPnZPzG0RkMzAcmN81IXqiU/dh3ekU0wJgiIgMEJEEYDowp8U0c4Br3ZYAJwD7VHVHVwfaidpcZxHpDzwPfC2Cjyb9tbnOqjpAVYtUtQj4F/CdCE4OENy2/RJwsojEiUgKMBlY3cVxdqZg1rkEp8aEiPQGhgGbujTKrtep+7BuU4NQ1UYRuRl4HacFxKOqulJEbnLHz8Bp0XI+sAE4gHMEErGCXOc7gWzgIfeIulEj+CmYQa5zVAlmnVV1tYi8BiwDfMAjqhqwqWQkCPJ7/gXwmIgsxzn1cpuqRvQjwEVkJnAakCMipcBdQDyEZh9mj9owxhgTUHc6xWSMMaYdLEEYY4wJyBKEMcaYgCxBGGOMCcgShDHGmIAsQRjTDiLSR0RmichGEVklInNFZOhRpv+he99BW/O9XERWi8g77vuZ7tM4f9SZ8RvTHtbM1ZgguQ99+xj4Z/P9FCIyDkhX1Q9aKbMFKG6r/b17j8KvVfUdEemD8xTOws6M35j2shqEMcE7HTjkf7Odqi4BYkXkleZhIvKAiHxDRL4P9APe8asZXCkiy0VkhYj82h12J84zsGaIyP3AG0Avt++Gk7ts7YxpwRKEMcEbBSwKdmJV/TPOc3BOV9XTRaQf8Guch8eNAyaKyFdU9R5gIXC1qt4KfBnYqKrjWquZGNMVLEEY03UmAu+qaoWqNgJP4XQAY0xYsgRhTPBW4jwyu6VGjvwtJbVSPiqeLW66D0sQxgTvbSBRRG5oHiAiE3EeFjdCRBJFJAP3CaKuGiDdff0pcKqI5IhILHAlEPF9Q5voZQnCmCC5/QpcDJztNnNdCdyNc53hWZwnpT4FLPYr9jDwqoi84z52+afAOzh9KH+mqi914SoY0y7WzNUYY0xAVoMwxhgTkCUIY4wxAVmCMMYYE5AlCGOMMQFZgjDGGBOQJQhjjDEBWYIwxhgT0P8Hey9nu62gL6cAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(c_range, accuracies)\n",
    "plt.title(\"Effect of changing cutoff on test set accuracy\")\n",
    "plt.ylabel(\"Accuracy\")\n",
    "plt.xlabel(\"Cutoff\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "wound-replacement",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
